# R script for replicating results in appendix section "Randomisation Checks"
# Zhai & Garside 2022
# original device: MacPro 13, R 4.1.2
# recommended working directory: Desktop

setwd("~/Desktop") # default wkd
rm(list = ls())

# pkgs --------------------------------------------------------------------

if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, magrittr, fixest, estimatr, ggpubr, knitr, kableExtra)

# helpers -----------------------------------------------------------------

source("R_00_helpers.R")

# figure 5 ----------------------------------------------------------------

## data ----
df.covars <- read.csv("data_covariates.csv", row.names = 1) 
df.flood <- read.csv("data_vote_ts.csv", row.names = 1) %>% 
  filter(date==max(date)) %>% 
  select(gen_id, gen_name, contains("flood"), contains("severe"))
df.fcovars <- left_join(df.flood, df.covars, by = "gen_id") %>% 
  select(-gen_name.y) %>% 
  rename(gen_name = gen_name.x) 
df.fcovars.std <- df.fcovars %>% 
  mutate(across(pop_per_sqkm:land_agri_pct, ~scale(.x, center = TRUE, scale = TRUE)))
df.fcovars.s4 <- df.fcovars %>% 
  group_by(land_name) %>% 
  mutate(flooded_land = max(ifelse(flooded==1,1,0))) %>% 
  ungroup() %>% 
  filter(flooded_land == 1) %>% 
  mutate(flooded_land = factor(flooded_land))
df.fcovars.std.s4 <- df.fcovars.s4 %>% 
  mutate(across(pop_per_sqkm:land_agri_pct, ~scale(.x, center = TRUE, scale = TRUE)))

## test ----

### XX = distance ----
btest.dist <- fixest::feols(
  data = filter(df.fcovars.s4, year==2020) %>% mutate(distance=scale(distance)),
  distance~factor(flooded),
  cluster = "land_name") %>% 
  tidy_btest_distance()
btest.dist.x <- fixest::feols(
  data = filter(df.fcovars.s4, year==2020) %>% mutate(distance=scale(distance)),
  distance~factor(flooded2),
  cluster = "land_name") %>% 
  tidy_btest_distance()
btest0.dist <- fixest::feols(
  data = filter(df.fcovars, year==2020) %>% mutate(distance=scale(distance)),
  distance~factor(flooded),
  cluster = "land_name") %>% 
  tidy_btest_distance()

btest0.dist.x <- fixest::feols(
  data = filter(df.fcovars, year==2020) %>% mutate(distance=scale(distance)),
  distance~factor(flooded2),
  cluster = "land_name") %>% 
  tidy_btest_distance()

### XX = all ----
id.covars <- c("pop_per_sqkm", "net_out_per_thousand", "old_share", "income_mean", "unemployed_rate", "land_set_pct", "land_agri_pct", "incumbent")
btest.out <- lapply(id.covars, function(x) test_balance(var_id = x, df = df.fcovars.std.s4)) %>% 
  tidy_best_all(btest.dist, var_id = id.covars)
btest.out.x <- lapply(id.covars, function(x) test_balance(var_id = x, df = df.fcovars.std.s4, d = "flooded2")) %>% 
  tidy_best_all(btest.dist, var_id = id.covars)
btest0.out <- lapply(id.covars, function(x) test_balance(var_id = x, df = df.fcovars.std)) %>% 
  tidy_best_all(btest0.dist, var_id = id.covars)
btest0.out.x <- lapply(id.covars, function(x) test_balance(var_id = x, df = df.fcovars.std, d = "flooded2")) %>% 
  tidy_best_all(btest0.dist, var_id = id.covars)

### results ----
btest.all <- list("sub" = btest.out, "all" = btest0.out)
btest.all.x <- list("sub" = btest.out.x, "all" = btest0.out.x)
btests <- list("gov" = btest.all, "satellite" = btest.all.x)

## plot ----
btests %>% 
  map(~bind_rows(.x, .id = "sample")) %>% 
  map(~rename(.x, "n" =1, "z"=2, "x"=3, "coef"=4, "se"=5, "t"=6, "p"=7)) %>% 
  bind_rows(.id = "D") %>% 
  mutate(
    z = case_when(
      z == "pop_per_sqkm" ~ "Population Density",
      z == "net_out_per_thousand" ~ "Population Outflow",
      z == "old_share" ~ "Share of Elderly",
      z == "income_mean" ~ "Income",
      z == "unemployed_rate" ~ "Unemployment Rate",
      z == "land_set_pct" ~ "Land Use (Settled)",
      z == "land_agri_pct" ~ "Land Use (Agricultural)",
      z == "incumbent" ~  "Green in Land",
      z == "distance" ~ "Distance to Flooded Area"
    ) %>% 
      factor(levels = c("Population Density", "Population Outflow", "Share of Elderly",
                        "Income", "Unemployment Rate", 
                        "Land Use (Settled)", "Land Use (Agricultural)", 
                        "Green in Land", "Distance to Flooded Area")),
    x = case_when(
      x == "factor(flooded)1:factor(year)2012" | x == "factor(flooded2)1:factor(year)2012" ~ "Treated X 2013",
      x == "factor(flooded)1:factor(year)2016" | x == "factor(flooded2)1:factor(year)2016" ~ "Treated X 2017",
      x == "factor(flooded)1:factor(year)2020" | x == "factor(flooded2)1:factor(year)2020" ~ "Treated X 2021",
      TRUE ~ "Treated X 2021"
    ),
    n = factor(n, levels = c("all", "sub"), labels = c("All states", "Flooded states")),
    D = factor(D, levels = c("gov", "satellite"), labels = c("Primary measure", "Secondary measure"))
  ) %>%  
  ggplot(aes(x = z, y = coef, color = x)) +
  geom_pointrange(aes(ymin = coef - 1.96*se, ymax = coef + 1.96*se), position = position_dodge(width = 0.7)) +
  facet_grid(D~n) +
  scale_x_discrete(labels = scales::label_wrap(width = 10)) +
  scale_y_continuous(limits = c(-2,2)) +
  scale_color_viridis_d() +
  coord_flip() +
  theme_bw() + labs_pubr() + 
  labs(title = "Balance test results, standardized differences in covariates",
       subtitle = "Municipality-level data, all (left) and flooded (right) states",
       x = "", y = "Std. Differences", color = "Lagged Treatment") +
  theme(legend.position = "top") 
ggsave("~/Desktop/appendix_figure5.pdf", width = 8, height = 10) 

# figures 6-7 -------------------------------------------------------------

## data ----
df.trend <- read.csv("data_vote_ts.csv", row.names = 1) %>% 
  mutate(
    flooded = factor(flooded, levels = c(0,1), labels = c("No", "Yes")),
    severe = factor(severe, levels = c(0,1), labels = c("No", "Yes")),
    flooded2 = factor(flooded2, levels = c(0,1), labels = c("No", "Yes")),
    severe2 = factor(severe2, levels = c(0,1), labels = c("No", "Yes")),
    date = as.Date(date)) %>% 
  mutate(GMD = gen_id, Election  = date)
df.trend.s4 <- df.trend %>% 
  group_by(land_name) %>% 
  mutate(flooded_land = max(ifelse(flooded=="Yes",1,0))) %>% 
  ungroup() %>% 
  filter(flooded_land == 1) %>% 
  mutate(flooded_land = factor(flooded_land)) 

## mean|group ----
df.trend.m <- df.trend %>%
  group_by(flooded, date) %>% 
  summarise(v_green_pct_m = lm_robust(v_green_pct~1)$coefficients,
            v_green_pct_lwr = lm_robust(v_green_pct~1)$conf.low,
            v_green_pct_upr = lm_robust(v_green_pct~1)$conf.high)
df.trend.s4.m <- df.trend.s4 %>%
  group_by(flooded, date) %>% 
  summarise(v_green_pct_m = lm_robust(v_green_pct~1)$coefficients,
            v_green_pct_lwr = lm_robust(v_green_pct~1)$conf.low,
            v_green_pct_upr = lm_robust(v_green_pct~1)$conf.high)

## plot ----
ggplot(df.trend.s4, aes(x = factor(date), color = flooded, fill = flooded)) +
  geom_point(aes(x = factor(date), y = v_green_pct), alpha = 0.1, size = 0.3, 
             position = position_jitterdodge(dodge.width = 0.5)) +
  geom_errorbar(data = df.trend.s4.m,
                aes(y = v_green_pct_m, ymin = v_green_pct_lwr, ymax = v_green_pct_upr),
                width = 0.2, position = position_dodge(width = 0.5)) +
  geom_point(data = df.trend.s4.m, aes(y = v_green_pct_m),
             size = 4, position = position_dodge(width = 0.5)) +
  geom_line(data = df.trend.s4.m, 
            aes(y = v_green_pct_m, group = flooded),
            position = position_dodge(width = 0.5)) +
  scale_color_viridis_d(option = "H", begin = 0.1, end = 0.9) + 
  scale_fill_viridis_d(option = "H", begin = 0.1, end = 0.9) +
  scale_y_continuous(limits = c(0,0.25), breaks = scales::pretty_breaks(n = 5), labels = scales::percent) +
  labs(x = NULL, y = "% Green", 
       title = "Pre-treatment trend in Green party support, federal elections", 
       subtitle = "Municipality-level second-vote share, affected states, 2009-2017",
       color = "Affected?", fill = "Affected?") +
  theme_pubclean() + labs_pubr() +
  theme(plot.title = element_text(size = 16),
        plot.subtitle = element_text(size = 14))
ggsave("~/Desktop/appendix_figure6.pdf.pdf", width = 8, height = 6, device = cairo_pdf)

ggplot(df.trend, aes(x = factor(date), color = flooded, fill = flooded)) +
  geom_point(aes(x = factor(date), y = v_green_pct), alpha = 0.1, size = 0.3,
             position = position_jitterdodge(dodge.width = 0.5)) +
  geom_errorbar(data = df.trend.m,
                aes(y = v_green_pct_m, ymin = v_green_pct_lwr, ymax = v_green_pct_upr),
                width = 0.2, position = position_dodge(width = 0.5)) +
  geom_point(data = df.trend.m, aes(y = v_green_pct_m),
             size = 4, position = position_dodge(width = 0.5)) +
  geom_line(data = df.trend.m,
            aes(y = v_green_pct_m, group = flooded),
            position = position_dodge(width = 0.5)) +
  scale_color_viridis_d(option = "H", begin = 0.1, end = 0.9) +
  scale_fill_viridis_d(option = "H", begin = 0.1, end = 0.9) +
  scale_y_continuous(limits = c(0,0.25), breaks = scales::pretty_breaks(n = 5), labels = scales::percent) +
  labs(x = NULL, y = "% Green",
       title = "Pre-treatment trend in Green party support, federal elections", 
       subtitle = "Municipality-level second-vote share, all states, 2009-2017",
       color = "Affected?", fill = "Affected?") +
  theme_pubclean() + labs_pubr() +
  theme(plot.title = element_text(size = 16),
        plot.subtitle = element_text(size = 14))
ggsave("~/Desktop/appendix_figure7.pdf", width = 8, height = 6, device = cairo_pdf)

# table 2 -----------------------------------------------------------------

pt.raw <- test_pt(df = df.trend.s4, fml = v_green_pct~factor(flooded)*factor(date))
pt.adj <- test_pt(df = df.trend.s4, fml = v_green_pct~factor(flooded)*factor(date)+pop_per_sqkm+net_out_per_thousand+old_share+income_mean+unemployed_rate+land_set_pct+land_agri_pct+factor(incumbent)+distance*factor(date))
pt.raw.x <- test_pt(df = df.trend.s4, fml = v_green_pct~factor(flooded2)*factor(date))
pt.adj.x <- test_pt(df = df.trend.s4, fml = v_green_pct~factor(flooded2)*factor(date)+pop_per_sqkm+net_out_per_thousand+old_share+income_mean+unemployed_rate+land_set_pct+land_agri_pct+factor(incumbent)+distance*factor(date))

tb2 <- etable(
  list(pt.raw,pt.adj,pt.raw.x,pt.adj.x), 
  drop = c("!\\sx\\s", "distance"),
  digits = 3, digits.stats = 3,
  depvar = FALSE,
  dict = c(`factor(flooded)Yes` = "Treated", `factor(flooded2)Yes` = "Treated",`factor(date)2013-09-22` = "2013", `factor(date)2017-09-24` = "2017",
           gen_id = "Municipality", date = "Election"),
  signif.code = c("***" = 0.001, "**" = 0.01, "*" = 0.05),
  style.df = style.df(depvar.title = "", fixef.title = "", fixef.prefix = "FE: ", yesNo = "Yes")) %>% 
  kbl(col.names = rep(c("Base", "Full"), 2), caption = "Parallel trends test. Flooded states only.") %>% 
  add_header_above(header = c(" "=1, "Primary measure" = 2, "Secondary measure" = 2)) %>% 
  add_footnote(label = "Note: GMD = Gemeinden (municipality); covariate results omitted to save space and available from the authors; *** p<0.001, ** p<0.01, * p<0.05.") %>% 
  kable_classic_2()
save_kable(tb2, "~/Desktop/appendix_table2.pdf")

# table 3 -----------------------------------------------------------------

pt0.raw <- test_pt(df = df.trend, fml = v_green_pct~factor(flooded)*factor(date))
pt0.adj <- test_pt(df = df.trend, fml = v_green_pct~factor(flooded)*factor(date)+pop_per_sqkm+net_out_per_thousand+old_share+income_mean+unemployed_rate+land_set_pct+land_agri_pct+factor(incumbent)+distance*factor(date))
pt0.raw.x <- test_pt(df = df.trend, fml = v_green_pct~factor(flooded2)*factor(date))
pt0.adj.x <- test_pt(df = df.trend, fml = v_green_pct~factor(flooded2)*factor(date)+pop_per_sqkm+net_out_per_thousand+old_share+income_mean+unemployed_rate+land_set_pct+land_agri_pct+factor(incumbent)+distance*factor(date))

tb3 <- etable(
  list(pt0.raw,pt0.adj,pt0.raw.x,pt0.adj.x), 
  drop = c("!\\sx\\s", "distance"),
  digits = 3, digits.stats = 3,
  depvar = FALSE,
  dict = c(`factor(flooded)Yes` = "Treated", `factor(flooded2)Yes` = "Treated",`factor(date)2013-09-22` = "2013", `factor(date)2017-09-24` = "2017",
           gen_id = "Municipality", date = "Election"),
  signif.code = c("***" = 0.001, "**" = 0.01, "*" = 0.05),
  style.df = style.df(depvar.title = "", fixef.title = "", fixef.prefix = "FE: ", yesNo = "Yes")) %>% 
  kbl(col.names = rep(c("Base", "Full"), 2), caption = "Parallel trends test. All states.") %>% 
  add_header_above(header = c(" "=1, "Primary measure" = 2, "Secondary measure" = 2)) %>% 
  add_footnote(label = "Note: GMD = Gemeinden (municipality); covariate results omitted to save space and available from the authors; *** p<0.001, ** p<0.01, * p<0.05.") %>% 
  kable_classic_2()
save_kable(tb3, "~/Desktop/appendix_table3.pdf")

# cleanup -----------------------------------------------------------------

pacman::p_unload("all")
rm(list = ls())
